Gastos_casa %>%
dplyr::select(-Tiempo,-link) %>%
dplyr::select(fecha, gasto, monto, gastador,obs) %>% tail(30) %>%
knitr::kable(format = "markdown", size=12)
| fecha | gasto | monto | gastador | obs |
|---|---|---|---|---|
| 8/11/2025 | Comida | 101773 | Tami | Supermercado |
| 8/11/2025 | Comida | 20000 | Andrés | lo saldes tami |
| 15/11/2025 | Comida | 74078 | Tami | Supermercado |
| 17/11/2025 | VTR | 22000 | Andrés | NA |
| 17/11/2025 | Electricidad | 42000 | Andrés | NA |
| 22/11/2025 | Comida | 6800 | Tami | Helado/Galleta Valpo |
| 23/11/2025 | Comida | 48730 | Tami | Almuerzo Valpo |
| 23/11/2025 | Comida | 9990 | Tami | Cafetería/Sanguche Valpo |
| 24/11/2025 | Transporte | 5195 | Tami | Uber Reñaca-Terminal Buses Viña |
| 24/11/2025 | Transporte | 2675 | Tami | Uber Depto-Mcdonald Reñaca |
| 22/11/2025 | Transporte | 8193 | Tami | Uber Viña-Depto Reñaca |
| 22/11/2025 | Transporte | 9766 | Tami | Uber depto Reñaca-Valpo |
| 24/11/2025 | Transporte | 10000 | Andrés | condor |
| 24/11/2025 | Comida | 38290 | Andrés | barra central |
| 23/11/2025 | Comida | 17200 | Andrés | NA |
| 23/11/2025 | Comida | 18766 | Andrés | NA |
| 20/11/2025 | Comida | 22258 | Andrés | lider |
| 24/11/2025 | Comida | 55094 | Tami | Supermercado |
| 26/11/2025 | Comida | 50000 | Andrés | piwen |
| 29/11/2025 | Comida | 108909 | Tami | Supermercado |
| 4/12/2025 | Agua | 21244 | Andrés | NA |
| 8/12/2025 | Comida | 81926 | Tami | Supermercado |
| 13/12/2025 | Comida | 74439 | Tami | Supermercado |
| 20/12/2025 | Comida | 74418 | Tami | Supermercado |
| 23/12/2025 | Otros | 10000 | Andrés | basureros navidad |
| 24/12/2025 | Electricidad | 44000 | Andrés | NA |
| 24/12/2025 | Agua | 22000 | Andrés | NA |
| 28/12/2025 | VTR | 22000 | Andrés | NA |
| 31/3/2019 | Comida | 9000 | Andrés | NA |
| 8/9/2019 | Comida | 24588 | Andrés | Super Lider |
#para ver las diferencias depués de la diosi
Gastos_casa %>%
dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>%
dplyr::mutate(fecha=strftime(fecha, format = "%Y-W%V")) %>%
dplyr::mutate(gastador=ifelse(gastador=="Andrés",1,0)) %>%
dplyr::group_by(gastador, fecha,.drop = F) %>%
dplyr::summarise(gasto_media=mean(monto,na.rm=T)) %>%
dplyr::mutate(treat=ifelse(fecha>"2019-W26",1,0)) %>%
#dplyr::mutate(fecha_simp=lubridate::week(fecha)) %>%#después de diosi. Junio 24, 2019
dplyr::mutate(gastador_nombre=plyr::revalue(as.character(gastador), c("0" = "Tami", "1"="Andrés"))) %>%
assign("ts_gastos_casa_week_treat", ., envir = .GlobalEnv)
gplots::plotmeans(gasto_media ~ gastador_nombre, main="Promedio de gasto por gastador", data=ts_gastos_casa_week_treat,ylim=c(0,75000), xlab="", ylab="")
library(ggplot2)
ggplot(
ts_gastos_casa_week_treat,
aes(x = gastador_nombre, y = gasto_media)
) +
stat_summary(fun = mean, geom = "point", size = 3, color = "steelblue") +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
facet_wrap(~ treat, labeller = labeller(
treat = c(`0` = "Antes de Diosi", `1` = "Después de Diosi")
)) +
coord_cartesian(ylim = c(0, 70000)) +
labs(x = "", y = "") +
theme_minimal(base_size = 13) +
theme(
strip.text = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
library(ggiraph)
library(scales)
#if( requireNamespace("dplyr", quietly = TRUE)){
gg <- Gastos_casa %>%
dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>%
dplyr::mutate(gastador=ifelse(gastador=="Andrés",1,0)) %>%
dplyr::mutate(fecha_simp=tsibble::yearweek(fecha)) %>%
dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
dplyr::mutate(treat=ifelse(fecha_week>"2019 W26",1,0)) %>%
dplyr::mutate(gastador_nombre=plyr::revalue(as.character(gastador), c("0" = "Tami", "1"="Andrés"))) %>%
# dplyr::mutate(week=as.Date(as.character(lubridate::floor_date(fecha, "week"))))%>%
#dplyr::mutate(fecha_week= lubridate::parse_date_time(fecha_week, c("%Y-W%V"),exact=T)) %>%
dplyr::group_by(gastador_nombre, fecha_simp) %>%
dplyr::summarise(monto_total=sum(monto)) %>%
dplyr::mutate(tooltip= paste0(substr(gastador_nombre,1,1),"=",round(monto_total/1000,2))) %>%
ggplot(aes(hover_css = "fill:none;")) +#, ) +
#stat_summary(geom = "line", fun.y = median, size = 1, alpha=0.5, aes(color="blue")) +
geom_line(aes(x = fecha_simp, y = monto_total, color=as.factor(gastador_nombre)),size=1,alpha=.5) +
ggiraph::geom_point_interactive(aes(x = fecha_simp, y = monto_total, color=as.factor(gastador_nombre),tooltip=tooltip),size = 1) +
#geom_text(aes(x = fech_ing_qrt, y = perc_dup-0.05, label = paste0(n)), vjust = -1,hjust = 0, angle=45, size=3) +
# guides(color = F)+
theme_custom() +
geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
labs(y="Gastos (en miles)",x="Semanas y Meses", subtitle="Interlineado, incorporación de la Diosi; Azul= Tami; Rojo= Andrés") + ggtitle( "Figura 4. Gastos por Gastador") +
scale_y_continuous(labels = f <- function(x) paste0(x/1000)) +
scale_color_manual(name = "Gastador", values= c("blue", "red"), labels = c("Tami", "Andrés")) +
tsibble::scale_x_yearweek(date_breaks = "1 month", minor_breaks = "1 week", labels=scales::date_format("%m/%y")) +
theme(axis.text.x = element_text(vjust = 0.5,angle = 35), legend.position='bottom')+
theme(
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")
)
# x <- girafe(ggobj = gg)
# x <- girafe_options(x = x,
# opts_hover(css = "stroke:red;fill:orange") )
# if( interactive() ) print(x)
#}
tooltip_css <- "background-color:gray;color:white;font-style:italic;padding:10px;border-radius:10px 20px 10px 20px;"
#ggiraph(code = {print(gg)}, tooltip_extra_css = tooltip_css, tooltip_opacity = .75 )
x <- girafe(ggobj = gg)
x <- girafe_options(x,
opts_zoom(min = 1, max = 3), opts_hover(css =tooltip_css))
x
plot<-Gastos_casa %>%
dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>%
dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
dplyr::mutate(month=as.Date(as.character(lubridate::floor_date(fecha, "month"))))%>%
dplyr::group_by(month)%>%
dplyr::summarise(gasto_total=sum(monto)/1000) %>%
ggplot2::ggplot(aes(x = month, y = gasto_total)) +
geom_point()+
geom_line(size=1) +
theme_custom() +
geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
geom_vline(xintercept = as.Date("2019-03-23"),linetype = "dashed", color="red") +
labs(y="Gastos (en miles)",x="Meses/Año", subtitle="Interlineado, incorporación de la Diosi") +
ggtitle( "Figura. Suma de Gastos por Mes") +
scale_x_date(breaks = "1 month", minor_breaks = "1 month", labels=scales::date_format("%m/%y")) +
theme(axis.text.x = element_text(vjust = 0.5,angle = 45))
plotly::ggplotly(plot)
plot2<-Gastos_casa %>%
dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>%
dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))%>%
dplyr::group_by(day)%>%
summarise(gasto_total=sum(monto)/1000) %>%
ggplot2::ggplot(aes(x = day, y = gasto_total)) +
geom_line(size=1) +
theme_custom() +
geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
geom_vline(xintercept = as.Date("2020-03-23"),linetype = "dashed", color="red") +
labs(y="Gastos (en miles)",x="Meses/Año", subtitle="Interlineado, incorporación de la Diosi") +
ggtitle( "Figura. Suma de Gastos por Día") +
scale_x_date(breaks = "1 month", minor_breaks = "1 week", labels=scales::date_format("%m/%y")) +
theme(axis.text.x = element_text(vjust = 0.5,angle = 45))
plotly::ggplotly(plot2)
tsData <- Gastos_casa %>%
dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>%
dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))%>%
dplyr::group_by(day)%>%
summarise(gasto_total=sum(monto))%>%
dplyr::mutate(covid=case_when(day>as.Date("2019-06-02")~1,TRUE~0))%>%
dplyr::mutate(covid=case_when(day>as.Date("2020-03-10")~covid+1,TRUE~covid))%>%
dplyr::mutate(covid=as.factor(covid))%>%
data.frame()
tsData_gastos <-ts(tsData$gasto_total, frequency=7)
mstsData_gastos <- forecast::msts(Gastos_casa$monto, seasonal.periods=c(7,30))
tsData_gastos = decompose(tsData_gastos)
tsdata_gastos_trend<-cbind(tsData,trend=as.vector(tsData_gastos$trend))%>% na.omit()
# Assuming your time series starts on "2019-03-03"
start_date <- as.Date("2019-03-03")
frequency <- 7 # Weekly data
num_periods <- length(tsData_gastos$x) # Total number of periods in your time series
# Generate sequence of dates
dates <- tsData$day# seq.Date(from = start_date, by = "day", length.out = num_periods)
# Create a data frame from the decomposed time series object
tsData_gastos_df <- data.frame(
day = dates,
Actual = as.numeric(tsData_gastos$x),
Seasonal = as.numeric(tsData_gastos$seasonal),
Trend = as.numeric(tsData_gastos$trend),
Random = as.numeric(tsData_gastos$random)
)
tsData_gastos_long <- tsData_gastos_df %>%
pivot_longer(cols = c("Actual", "Seasonal", "Trend", "Random"),
names_to = "Component", values_to = "Value")
# Plotting with facet_wrap
ggplot(tsData_gastos_long, aes(x = day, y = Value)) +
geom_line() +
theme_bw() +
labs(title = "Descomposición de los Gastos Diarios", x = "Date", y = "Value") +
scale_x_date(date_breaks = "3 months", date_labels = "%m %Y") +
facet_wrap(~ Component, scales = "free_y", ncol=1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
theme(strip.text = element_text(size = 12))
Ahora con las tendencias descompuestas
#require(zoo)
Gastos_casa_gastador_cat_gasto<-
Gastos_casa %>%
dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>%
dplyr::mutate(fecha2=strftime(fecha, format = "%Y-W%V")) %>%
dplyr::mutate(gastador=ifelse(gastador=="Andrés",1,0)) %>%
dplyr::mutate(treat=ifelse(fecha2>"2019-W26",1,0)) %>%
dplyr::mutate(
# 1) Normalización básica de texto
gasto_clean = gasto %>%
stringr::str_squish() %>% # espacios extra
stringr::str_to_lower() %>% # todo en minúsculas
stringr::str_replace_all(",", "") %>% # quita comas, si hubiera
stringr::str_replace_all("\\s+/\\s+", "/") %>% # "a / b" -> "a/b"
stringi::stri_trans_general("Latin-ASCII"), # sin tildes
# 2) Unificar etiquetas que son lo mismo
gasto_clean = dplyr::case_when(
gasto_clean %in% c("uber", "uber reñaca", "uber matri cony", "uber cumple papa") ~ "uber",
gasto_clean %in% c("yaz", "yaz ") ~ "yaz",
gasto_clean %in% c("limpieza alfombras", "limpieza alfombra") ~ "limpieza alfombras",
gasto_clean %in% c("vacuna influenza", "vacunas influenza") ~ "vacuna influenza",
gasto_clean %in% c("aspiradora", "aspiradora ") ~ "aspiradora",
gasto_clean %in% c("plata reciclaje y basurero",
"plata basurero", "plata basureros",
"plata fiestas patrias basureros",
"aporte basureros", "basureros") ~ "basureros",
gasto_clean %in% c("donaciones", "donacion") ~ "donacion",
gasto_clean %in% c("prestamo", "prestamo andres", "prestamo andres", "prestamo andres ") ~ "prestamo andres",
TRUE ~ gasto_clean
),
# 3) (Opcional) Variable más agregada de tipo de gasto
gasto_cat = dplyr::case_when(
gasto_clean == "comida" ~ "Comida",
gasto_clean %in% c("enceres") ~ "Enceres",
gasto_clean == "diosi" ~ "Diosi",
gasto_clean %in% c("vtr", "netflix",
"crunchyroll", "entel") ~ "Streaming/Telefonia",
gasto_clean %in% c("electricidad", "gas",
"parafina", "kerosen") ~ "Servicios básicos",
gasto_clean %in% c("agua") ~ "Agua",
gasto_clean %in% c("uber", "transporte",
"gas/bencina", "bencina + tag",
"bencina reñaca",
"bencina + peajes maite",
"bencina + tag cumple delox") ~ "Transporte",
gasto_clean %in% c("farmacia", "remedio",
"remedios", "remedios covid",
"gaviscón y paracetamol",
"nexium") ~ "Farmacia/Salud",
gasto_clean %in% c("electrodomesticos/mantencion casa",
"microondas regalo", "aspiradora",
"muebles ratan", "mouse",
"reloj") ~ "Electrodomesticos",
TRUE ~ stringr::str_to_title(gasto_clean) # default: limpio pero no agregado
)
) %>%
dplyr::mutate(
gasto_cat10 = dplyr::case_when(
# 1) COMIDA (incluye los “comida” especiales de tu criterio viejo)
gasto_cat %in% c(
"Comida",
"Pan Pepperino",
"Cookidoo",
"Granola Wild Foods",
"Wild Protein",
"Brussels"
) ~ "Comida",
# 2) ENCERES
gasto_cat %in% c(
"Enceres",
"Incoludido",
"Tres Toques"
) ~ "Enceres",
# 3) DIOSI
gasto_cat == "Diosi" ~ "Diosi",
# 4) SERVICIOS BÁSICOS (agua, luz, gas/bencina, etc.)
gasto_cat %in% c(
"Servicios básicos",
"Agua",
"Bencina Renaca"
) ~ "Servicios básicos",
# 5) FARMACIA (todo lo que en tu criterio iba a Farmacia)
gasto_cat %in% c(
"Farmacia/Salud",
"Yaz",
"Gaviscon Y Paracetamol",
"Vacuna Influenza",
"Cruz Verde"
) ~ "Farmacia",
# 6) TRANSPORTE (uber, viajes, bencina que antes era Transporte)
gasto_cat %in% c(
"Transporte",
"Uber Renaca",
"Viaje Brasil"
) ~ "Transporte",
# 7) ELECTRODOMÉSTICOS / MANTENCIÓN CASA
gasto_cat %in% c(
"Electrodomesticos",
"Cortina Bano",
"Filtro Agua",
"Filtro Piscina Mspa",
"Mantencion Toyotomi",
"Limpieza Alfombras",
"Sopapo",
"Pila Estufa",
"Jardinero",
"Camaras Seguridad M.barrios",
"Pago Camaras Mb",
"Tina",
"Nacho",
"Lamina",
"Chromecast",
"Easy"
) ~ "Electrodomesticos/mantencion casa",
# 8) STREAMING / TELEFONÍA (Netflix / VTR / Crunchyroll / Entel, que ya aglutinaste en gasto_cat)
gasto_cat %in% c(
"Streaming/Telefonia"
) ~ "Streaming/Telefonia",
# 9) DONACIONES / REGALOS / APORTES
gasto_cat %in% c(
"Basureros",
"Donacion",
"Regalo Chocolates",
"Regalo Matri Chepa",
"Regalo Matri Cony",
"Matri Andres Kogan",
"Rgalo Chepa",
"Ropa",
"Ropa Tami",
"Assistcard Viaje"
) ~ "Donaciones/regalos",
# 10) TODO LO DEMÁS → OTROS
TRUE ~ "Otros"
)
) %>%
dplyr::group_by(gastador, fecha, gasto_cat10, .drop=F) %>%
dplyr::summarise(monto=sum(monto)) %>%
dplyr::mutate(gastador_nombre=plyr::revalue(as.character(gastador), c("0" = "Tami", "1"="Andrés")))
Gastos_casa_gastador_cat_gasto %>%
dplyr::mutate(fecha = as.Date(fecha)) %>% # por si viene como POSIXct
dplyr::group_by(gasto_cat10) %>% # opcional: quita ítems con 1 obs
dplyr::filter(n() >= 6) %>%
dplyr::ungroup() %>%
ggplot2::ggplot(aes(x = fecha, y = monto, color=as.factor(gastador_nombre))) +
#stat_summary(geom = "line", fun.y = median, size = 1, alpha=0.5, aes(color="blue")) +
geom_line(size=1) +
facet_grid(gasto_cat10~.)+
geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
labs(y="Gastos (en miles)",x="Años", subtitle="Interlineado, incorporación de la Diosi; Azul= Tami; Rojo= Andrés") +
ggtitle( "Figura 6. Gastos Semanales por Gastador e ítem (media) [n>5]") +
scale_y_continuous(labels = f <- function(x) paste0(x/1000)) +
scale_color_manual(name = "Gastador", values= c("blue", "red"), labels = c("Tami", "Andrés")) +
tsibble::scale_x_yearweek(breaks = "1 year", minor_breaks = "3 months", labels= scales::date_format("%m/%y")) +
guides(color = F)+
theme_custom() +
theme(axis.text.x = element_text(vjust = 0.5,angle = 35)) +
theme(
panel.border = element_blank(),
#panel.grid.major = element_blank(),
#panel.grid.minor = element_blank(),
panel.grid.minor = element_line(color = "grey90", size = 0.3),
panel.grid.major = element_line(color = "grey75", size = 0.5),
axis.line = element_line(colour = "black")
)
# Apply MSTL decomposition
Gastos_casa |>
dplyr::mutate(fecha= readr::parse_date(fecha, format="%d/%m/%Y")) |>
dplyr::arrange(fecha) |> pull(monto) -> monto_ts
Gastos_casa |>
dplyr::mutate(fecha= readr::parse_date(fecha, format="%d/%m/%Y")) |>
dplyr::arrange(fecha) |> pull(fecha) -> fecha_ts
mstl_data_autplt <- forecast::mstl(monto_ts, lambda = "auto",iterate=5000000,start =
lubridate::decimal_date(as.Date("2019-03-03")))
# Convert the decomposed time series to a data frame
mstl_df <- data.frame(
Date = as.Date(fecha_ts, format="%d/%m/%Y"),
Data = as.numeric(mstl_data_autplt[, "Data"]),
Trend = as.numeric(mstl_data_autplt[, "Trend"]),
Remainder = as.numeric(mstl_data_autplt[, "Remainder"])
)
# Reshape the data frame for ggplot2
mstl_long <- mstl_df %>%
dplyr::arrange(Date) %>%
tidyr::pivot_longer(cols = -Date, names_to = "Component", values_to = "Value")
mstl_long_filtered <- mstl_long %>% dplyr::filter(!(Date %in% (mstl_long %>% dplyr::distinct(Date) %>% top_n(7, Date) %>% pull(Date))))
# Plotting with ggplot2
ggplot(mstl_long_filtered, aes(x = Date, y = Value)) +
geom_line() +
theme_bw() +
labs(title = "Descomposición MSTL (- 7 days)", x = "Fecha", y = "Valor") +
scale_x_date(date_breaks = "3 months", date_labels = "%m-%Y") +
facet_wrap(~ Component, scales = "free_y", ncol = 1) +
theme(strip.text = element_text(size = 12),
axis.text.x = element_text(angle = 90, hjust = 1))
ts_week_covid<-
Gastos_casa %>%
dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>%
dplyr::mutate(fecha_week=strftime(fecha, format = "%Y-W%V")) %>%
dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))%>%
dplyr::group_by(fecha_week)%>%
dplyr::summarise(gasto_total=sum(monto,na.rm=T)/1000,min_day=min(day))%>%
dplyr::ungroup() %>%
dplyr::mutate(covid=dplyr::case_when(min_day>=as.Date("2020-03-17")~1,TRUE~0))%>%
dplyr::mutate(covid=as.factor(covid))%>%
data.frame()
ts_week_covid$gasto_total_na<-ts_week_covid$gasto_total
post_resp<-ts_week_covid$gasto_total[which(ts_week_covid$covid==1)]
ts_week_covid$gasto_total_na[which(ts_week_covid$covid==1)]<-NA
ts_week_covid$gasto_total[which(ts_week_covid$covid==0)]
## [1] 98.357 4.780 56.784 50.506 64.483 67.248 49.299 35.786 58.503
## [10] 64.083 20.148 73.476 127.004 81.551 69.599 134.446 58.936 26.145
## [19] 129.927 104.989 130.860 81.893 95.697 64.579 303.471 151.106 49.275
## [28] 76.293 33.940 83.071 119.512 20.942 58.055 71.728 44.090 33.740
## [37] 59.264 77.410 60.831 63.376 48.754 235.284 29.604 115.143 72.419
## [46] 5.980 80.063 149.178 69.918 107.601 72.724 63.203 99.681 130.309
## [55] 195.898 112.066
# 1) Create corpus
corpus <- tm::Corpus(tm::VectorSource(Gastos_casa$obs))
# 2) Preprocess text
d <- corpus |>
tm::tm_map(tm::content_transformer(tolower)) |>
tm::tm_map(tm::stripWhitespace) |>
tm::tm_map(tm::removePunctuation) |>
tm::tm_map(tm::removeNumbers) |>
tm::tm_map(tm::removeWords, tm::stopwords("spanish")) |>
tm::tm_map(tm::removeWords, "menos")
# 3) Term-document matrix
tdm <- tm::TermDocumentMatrix(d)
# 4) Convert to matrix
m <- base::as.matrix(tdm)
# 5) Frequencies
v <- base::sort(base::rowSums(m), decreasing = TRUE)
df <- base::data.frame(
word = base::names(v),
freq = v,
row.names = NULL
)
# 6) Wordcloud
wordcloud::wordcloud(
words = df$word,
freq = df$freq,
max.words = 100,
random.order= FALSE,
rot.per = 0.35,
colors = RColorBrewer::brewer.pal(8, "Dark2"),
scale = c(4, 0.5)
)
fit_month_gasto <- Gastos_casa_gastador_cat_gasto %>%
dplyr::ungroup() %>%
dplyr::mutate(
# month string YYYY-MM
fecha_month = base::format(fecha, "%Y-%m")
) %>%
dplyr::mutate(
# order months from 2019-03 until today (like in your original code)
fecha_month = base::factor(
fecha_month,
levels = base::format(
base::seq(
from = base::as.Date("2019-03-03"),
to = base::as.Date(base::substr(base::Sys.time(), 1, 10)),
by = "1 month"
),
"%Y-%m"
)
)
) %>%
dplyr::group_by(fecha_month, gasto_cat10, .drop = FALSE) %>%
dplyr::summarise(
gasto_total = base::sum(monto, na.rm = TRUE) / 1000,
.groups = "drop"
) %>%
base::as.data.frame()
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
current_ym <- base::format(base::Sys.Date(), "%Y-%m")
tabla_gasto_year_item <-
fit_month_gasto %>%
dplyr::mutate(
year = base::substr(base::as.character(fecha_month), 1, 4),
ym = base::as.character(fecha_month)
) %>%
# opcional: excluir el mes actual (incompleto)
dplyr::filter(ym != current_ym) %>%
# si quieres solo ciertos años:
# dplyr::filter(year %in% c("2020","2021","2022","2023","2024","2025")) %>%
dplyr::group_by(gasto_cat10, year) %>%
dplyr::summarise(
gasto_prom = base::mean(gasto_total, na.rm = TRUE),
.groups = "drop"
) %>%
tidyr::pivot_wider(
names_from = year,
values_from = gasto_prom
) %>%
janitor::adorn_totals() # agrega fila "Total"
tabla_gasto_year_item |>
knitr::kable("markdown",
caption = "Gasto Promedio Mensual por Ítem y Año (suma por categoría y mes)",
digits = 1)
| gasto_cat10 | 2019 | 2020 | 2021 | 2022 | 2023 | 2024 | 2025 |
|---|---|---|---|---|---|---|---|
| Comida | 208.4 | 392.9 | 317.9 | 312.4 | 366.0 | 326.9 | 363.2 |
| Diosi | 84.2 | 74.2 | 79.0 | 53.5 | 74.4 | 50.0 | 50.8 |
| Donaciones/regalos | NA | 34.2 | 86.0 | 49.3 | 45.0 | 34.0 | NA |
| Electrodomesticos/mantencion casa | NA | 57.8 | 196.4 | 27.6 | 43.9 | 63.4 | 42.7 |
| Enceres | 62.7 | 78.0 | 35.5 | 36.3 | 36.5 | 48.0 | 14.6 |
| Farmacia | NA | 33.7 | 33.6 | 17.0 | 32.1 | NA | NA |
| Otros | NA | NA | 90.0 | 60.0 | 5.2 | 229.5 | 18.3 |
| Servicios básicos | 73.2 | 57.4 | 73.4 | 105.6 | 85.7 | 129.3 | 95.9 |
| Streaming/Telefonia | 22.1 | 29.8 | 26.1 | 32.3 | 25.9 | 26.3 | 22.0 |
| Transporte | NA | NA | NA | 26.4 | 32.9 | 242.2 | 35.8 |
| Total | 450.6 | 758.1 | 938.0 | 720.4 | 747.7 | 1149.5 | 643.2 |
Saqué la UF proyectada
#options(max.print=5000)
uf18 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2018.htm")%>% rvest::html_nodes("table")
uf19 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2019.htm")%>% rvest::html_nodes("table")
uf20 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2020.htm")%>% rvest::html_nodes("table")
uf21 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2021.htm")%>% rvest::html_nodes("table")
uf22 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2022.htm")%>% rvest::html_nodes("table")
uf23 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2023.htm")%>% rvest::html_nodes("table")
uf24 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2024.htm")%>% rvest::html_nodes("table")
tryCatch(uf25 <-rvest::read_html("https://www.sii.cl/valores_y_fechas/uf/uf2025.htm")%>% rvest::html_nodes("table"),
error = function(c) {
uf24b <<- cbind.data.frame(Día=NA, variable=NA, value=NA)
}
)
tryCatch(uf25 <-uf25[[length(uf25)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1),
error = function(c) {
uf25 <<- cbind.data.frame(Día=NA, variable=NA, value=NA)
}
)
uf_serie<-
bind_rows(
cbind.data.frame(anio= 2018, uf18[[length(uf18)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2019, uf19[[length(uf19)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2020, uf20[[length(uf20)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2021, uf21[[length(uf21)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2022, uf22[[length(uf22)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2023, uf23[[length(uf23)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2024, uf23[[length(uf24)]] %>% rvest::html_table() %>% data.frame() %>% reshape2::melt(id.vars=1)),
cbind.data.frame(anio= 2025, uf25)
)
uf_serie_corrected<-
uf_serie %>%
dplyr::mutate(month=plyr::revalue(tolower(.[[3]]),c("ene" = 1, "feb"=2, "mar"=3, "abr"=4, "may"=5, "jun"=6, "jul"=7, "ago"=8, "sep"=9, "oct"=10, "nov"=11, "dic"=12))) %>%
dplyr::mutate(value=stringr::str_trim(value), value= sub("\\.","",value),value= as.numeric(sub("\\,",".",value))) %>%
dplyr::mutate(date=paste0(sprintf("%02d", .[[2]])," ",sprintf("%02d",as.numeric(month)),", ",.[[1]]), date3=lubridate::parse_date_time(date,c("%d %m, %Y"),exact=T),date2=date3) %>%
na.omit()#%>% dplyr::filter(is.na(date3))
## Warning: There was 1 warning in `dplyr::mutate()`.
## i In argument: `date3 = lubridate::parse_date_time(date, c("%d %m, %Y"), exact
## = T)`.
## Caused by warning:
## ! 54 failed to parse.
#Day of the month as decimal number (1–31), with a leading space for a single-digit number.
#Abbreviated month name in the current locale on this platform. (Also matches full name on input: in some locales there are no abbreviations of names.)
warning(paste0("number of observations:",nrow(uf_serie_corrected),", min uf: ",min(uf_serie_corrected$value),", min date: ",min(uf_serie_corrected $date3 )))
## Warning: number of observations:2921, min uf: 26799.01, min date: 2018-01-01
#
# uf_proyectado <- readxl::read_excel("uf_proyectado.xlsx") %>% dplyr::arrange(Período) %>%
# dplyr::mutate(Período= as.Date(lubridate::parse_date_time(Período, c("%Y-%m-%d"),exact=T)))
ts_uf_proy<-
ts(data = uf_serie_corrected$value,
start = as.numeric(as.Date("2018-01-01")),
end = as.numeric(as.Date(uf_serie_corrected$date3[length(uf_serie_corrected$date3)])), frequency = 1,
deltat = 1, ts.eps = getOption("ts.eps"))
fit_tbats <- forecast::tbats(ts_uf_proy)
fr_fit_tbats<-forecast::forecast(fit_tbats, h=298)
# Configurar API Key
if(nchar(Sys.getenv("API_NIXTLA"))<2){
Sys.setenv("API_NIXTLA"=as.character(read.table(paste0(dirname(getwd()),"/nixtlar_api_key.txt"), quote="\"", comment.char="")))
}
if(nchar(Sys.getenv("API_NIXTLA"))<2){
try(Sys.setenv("API_NIXTLA"=as.character(read.table(paste0(gsub("/gastos","",dirname(rstudioapi::getActiveDocumentContext()$path)),"/nixtlar_api_key.txt"), quote="\"", comment.char=""))))
}
try(nixtlar::nixtla_set_api_key(Sys.getenv("NIXTLA")))
## API key has been set for the current session.
if(nchar(Sys.getenv("NIXTLA"))<2){
nixtlar::nixtla_set_api_key(Sys.getenv("API_NIXTLA"))
}
## API key has been set for the current session.
# Preparar datos en formato requerido por TimeGPT
uf_timegpt <- uf_serie_corrected %>%
dplyr::rename(ds = date3, y = value) %>%
dplyr::mutate(ds = format(ds, "%Y-%m-%d")) %>%
dplyr::mutate(unique_id = "serie_1")%>%
dplyr::select(unique_id, ds, y)
# Realizar pronóstico con TimeGPT
timegpt_fcst <- nixtlar::nixtla_client_forecast(
uf_timegpt,
h = 298, # 298 días a pronosticar
freq = "D", # Frecuencia diaria
add_history = FALSE, # Incluir datos históricos en el output
level = c(80,95),
model= "timegpt-1-long-horizon",
clean_ex_first = TRUE
)
## The specified horizon h exceeds the model horizon. This may lead to less accurate forecasts. Please consider using a smaller horizon.
# The Conflict: The API endpoint for the long-horizon model likely does not support generating "fitted values" for the historical input data, causing the server to return "Unprocessable Entity" (422).
# 1. Convertir 'ds' a fecha en ambas tablas
uf_timegpt <- uf_timegpt %>%
mutate(ds = as.Date(ds))
timegpt_fcst <- timegpt_fcst %>%
mutate(ds = as.Date(ds))
# 2. Combinar los datos históricos y el pronóstico
full_data <- bind_rows(
uf_timegpt %>% mutate(type = "Histórico"),
timegpt_fcst %>% mutate(type = "Pronóstico")
)
# Ensure dates are Date objects
uf_timegpt <- uf_timegpt %>% mutate(ds = as.Date(ds))
timegpt_fcst <- timegpt_fcst %>% mutate(ds = as.Date(ds))
ggplot() +
# --- FORECAST LAYERS (Map y to 'TimeGPT') ---
# 95% Confidence Interval
geom_ribbon(data = timegpt_fcst,
aes(x = ds, ymin = `TimeGPT-lo-95`, ymax = `TimeGPT-hi-95`),
fill = "#4B9CD3", alpha = 0.2) +
# 80% Confidence Interval
geom_ribbon(data = timegpt_fcst,
aes(x = ds, ymin = `TimeGPT-lo-80`, ymax = `TimeGPT-hi-80`),
fill = "#4B9CD3", alpha = 0.3) +
# Forecast Line
geom_line(data = timegpt_fcst,
aes(x = ds, y = TimeGPT, color = "Pronóstico"), size = 1) +
# --- HISTORICAL LAYER (Map y to 'y') ---
geom_line(data = uf_timegpt,
aes(x = ds, y = y, color = "Histórico"), size = 1) +
# --- STYLING ---
geom_vline(xintercept = max(uf_timegpt$ds),
linetype = "dashed", color = "red", size = 0.8) +
scale_x_date(date_breaks = "3 months", date_labels = "%b %Y") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_color_manual(name = "Leyenda",
values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")) +
# Configuración del eje x
scale_x_date(
date_breaks = "3 months", # Reduce la frecuencia de las etiquetas
date_labels = "%b %Y", # Formato de etiquetas (mes y año)
) +
# Configuración del eje y
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
# Configuración de colores
scale_color_manual(
name = "Leyenda",
values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")
) +
# Títulos y subtítulos
labs(
title = "Pronóstico de UF, Serie Temporal con TimeGPT",
subtitle = "Intervalos de confianza al 80% (más oscuro) y 95% (más claro)",
x = "Fecha",
y = "Valor",
color = "Leyenda"
) +
# Tema y estilos
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
legend.position = "bottom",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
library(prophet)
## Warning: package 'prophet' was built under R version 4.4.3
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.4.3
## Loading required package: rlang
## Warning: package 'rlang' was built under R version 4.4.3
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
##
## flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
## The following object is masked from 'package:sparklyr':
##
## invoke
## The following object is masked from 'package:data.table':
##
## :=
model <- prophet(
cbind.data.frame(ds= as.Date(uf_timegpt$ds), y=uf_timegpt$y),
# Trend flexibility
growth = "linear",
changepoint.prior.scale = 0.05, # Reduced for smoother trend
n.changepoints = 50, # Increased from default 25
# Seasonality
yearly.seasonality = TRUE,
weekly.seasonality = TRUE,
daily.seasonality = FALSE, # Disabled for daily data
seasonality.mode = "additive",
seasonality.prior.scale = 15, # Increased to capture stronger seasonality
# Holidays (if applicable)
# holidays = generated_holidays # Create with add_country_holidays()
# Uncertainty intervals
interval.width = 0.95,
uncertainty.samples = 1000
)
future <- make_future_dataframe(model, periods = 298, include_history = T)
forecast <- predict(model, future)
forecast <- forecast[, c("ds", "yhat", "yhat_lower", "yhat_upper")]
forecast$pred <- ifelse(forecast$ds > max(uf_timegpt$ds), 1,0)
## Warning in check_tzones(e1, e2): 'tzone' attributes are inconsistent
forecast$ds <- as.Date(forecast$ds)
ggplot(forecast, aes(x = ds, y = yhat)) +
geom_ribbon(aes(ymin = yhat_lower, ymax = yhat_upper),
fill = "#9ecae1", alpha = 0.4) +
geom_line(color = "#08519c", linewidth = 0.8) +
geom_vline(xintercept = max(uf_timegpt$ds), color = "red", linetype = "dashed", linewidth=1) +
scale_x_date(date_breaks = "6 months", date_labels = "%y %b") +
scale_y_continuous(labels = scales::comma) +
labs(title = "Valores predichos (95%IC)",
# subtitle = "March 10, 2025 - May 7, 2025",
x = "Fecha",
y = "Valor",
# caption = "Source: Prophet Forecast Model"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "gray50"),
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
plot.caption = element_text(color = "gray30")
)
# 1. Ensure dates are strictly Date objects
history_df <- uf_timegpt %>% mutate(ds = as.Date(ds))
fcst_df <- timegpt_fcst %>% mutate(ds = as.Date(ds))
# 2. Plotting (Notice we don't bind_rows, we layer them)
ggplot() +
# --- FORECAST LAYERS ---
# 95% Confidence Interval
geom_ribbon(data = fcst_df,
aes(x = ds, ymin = `TimeGPT-lo-95`, ymax = `TimeGPT-hi-95`),
fill = "#4B9CD3", alpha = 0.2) +
# 80% Confidence Interval
geom_ribbon(data = fcst_df,
aes(x = ds, ymin = `TimeGPT-lo-80`, ymax = `TimeGPT-hi-80`),
fill = "#4B9CD3", alpha = 0.3) +
# Forecast Line (Map y to the 'TimeGPT' column)
geom_line(data = fcst_df,
aes(x = ds, y = TimeGPT, color = "Pronóstico"), size = 1) +
# --- HISTORY LAYER ---
# History Line (Map y to the 'y' column)
geom_line(data = history_df,
aes(x = ds, y = y, color = "Histórico"), size = 1) +
# --- STYLING ---
geom_vline(xintercept = max(history_df$ds),
linetype = "dashed", color = "red", size = 0.8) +
scale_x_date(date_breaks = "3 months", date_labels = "%b %Y") +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_color_manual(name = "Leyenda",
values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")) +
labs(
title = "Pronóstico de Serie Temporal con TimeGPT",
subtitle = "Modelo: timegpt-1-long-horizon",
x = "Fecha", y = "Valor"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "gray50"),
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
plot.caption = element_text(color = "gray30")
)+
theme(legend.position = "bottom")
La proyección de la UF a 298 días más 2025-12-31 00:04:58 sería de: 26.644 pesos// Percentil 95% más alto proyectado: 35.062,81
Según TimeGPT: La proyección de la UF a 298 días más 2026-10-25 sería de: 40.226,56 pesos// Percentil 80% más alto proyectado: 41.542,98 pesos// Percentil 95% más alto proyectado: 41.902,11
Según prophet: La proyección de la UF a 298 días más 2026-10-25 sería de: 42.543 pesos// Percentil 95% más alto proyectado: 50.255
Ahora con un modelo ARIMA automático
arima_optimal_uf = forecast::auto.arima(ts_uf_proy)
autoplotly::autoplotly(forecast::forecast(arima_optimal_uf, h=298), ts.colour = "darkred",
predict.colour = "blue", predict.linetype = "dashed")%>%
plotly::layout(showlegend = F,
yaxis = list(title = "Gastos"),
xaxis = list(
title="Fecha",
ticktext = as.list(seq(from = as.Date("2018-01-01"),
to = as.Date("2018-01-01")+length(fit_tbats$fitted.values)+298, by = 90)),
tickvals = as.list(seq(from = as.numeric(as.Date("2018-01-01")),
to = as.numeric(as.Date("2018-01-01"))+length(fit_tbats$fitted.values)+298, by = 90)),
tickmode = "array",
tickangle = 90
))
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## i Please use tidy evaluation idioms with `aes()`.
## i See also `vignette("ggplot2-in-packages")` for more information.
## i The deprecated feature was likely used in the ggfortify package.
## Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fr_fit_tbats_uf<-forecast::forecast(arima_optimal_uf, h=298)
dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats)),variable) %>% dplyr::summarise(max=max(value)) %>%
dplyr::right_join(dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_uf)),variable) %>% dplyr::summarise(max=max(value)),by="variable") %>%
dplyr::mutate(variable=factor(variable,levels=c("Lo.95","Lo.80","Point.Forecast","Hi.80","Hi.95"))) %>%
dplyr::arrange(variable) %>%
knitr::kable(format="markdown", caption="Tabla. Estimación UF (de aquí a 298 días) según cálculos de gastos mensuales",
col.names= c("Item","UF Proyectada (TBATS)","UF Proyectada (ARIMA)"))
## No id variables; using all as measure variables
## No id variables; using all as measure variables
| Item | UF Proyectada (TBATS) | UF Proyectada (ARIMA) |
|---|---|---|
| Lo.95 | 26261.74 | 26322.41 |
| Lo.80 | 26393.55 | 26487.38 |
| Point.Forecast | 26644.35 | 26799.01 |
| Hi.80 | 31444.73 | 32178.58 |
| Hi.95 | 34326.70 | 35026.36 |
Lo haré en base a 2 cálculos: el gasto semanal y el gasto mensual en base a mis gastos desde marzo de 2019. La primera proyección la hice añadiendo el precio del arriendo mensual y partiendo en 2 (porque es con yo y Tami). No se incluye el último mes.
Gastos_casa_nvo <- readr::read_csv(as.character(path_sec),
col_names = c("Tiempo", "gasto", "fecha", "obs", "monto", "gastador",
"link"),skip=1) %>%
dplyr::mutate(fecha= lubridate::parse_date_time(fecha, c("%d/%m/%Y"),exact=T)) %>%
dplyr::mutate(fecha_month=strftime(fecha, format = "%Y-%m")) %>%
dplyr::mutate(day=as.Date(as.character(lubridate::floor_date(fecha, "day"))))
Gastos_casa_m <-
Gastos_casa_nvo %>% dplyr::group_by(fecha_month)%>%
dplyr::summarise(gasto_total=(sum(monto)+500000)/1000,fecha=first(fecha))%>%
data.frame()
uf_serie_corrected_m <-
uf_serie_corrected %>% dplyr::mutate(ano_m=paste0(anio,"-",sprintf("%02d",as.numeric(month)))) %>% dplyr::group_by(ano_m)%>%
dplyr::summarise(uf=(mean(value))/1000,fecha=first(date3))%>%
data.frame() %>%
dplyr::filter(fecha>="2019-02-28")
#Error: Error in standardise_path(file) : object 'enlace_gastos' not found
ts_uf_serie_corrected_m<-
ts(data = uf_serie_corrected_m$uf[-length(uf_serie_corrected_m$uf)],
start = 1,
end = nrow(uf_serie_corrected_m),
frequency = 1,
deltat = 1, ts.eps = getOption("ts.eps"))
ts_gastos_casa_m<-
ts(data = Gastos_casa_m$gasto_total[-length(Gastos_casa_m$gasto_total)],
start = 1,
end = nrow(Gastos_casa_m),
frequency = 1,
deltat = 1, ts.eps = getOption("ts.eps"))
fit_tbats_m <- forecast::tbats(ts_gastos_casa_m)
seq_dates<-format(seq(as.Date("2019/03/01"), by = "month", length = dim(Gastos_casa_m)[1]+12), "%m\n'%y")
autplo2t<-
autoplotly::autoplotly(forecast::forecast(fit_tbats_m, h=12), ts.colour = "darkred",
predict.colour = "blue", predict.linetype = "dashed")%>%
plotly::layout(showlegend = F,
yaxis = list(title = "Gastos (en miles)+ Arriendo"),
xaxis = list(
title="Fecha",
ticktext = as.list(seq_dates[seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)]),
tickvals = as.list(seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)),
tickmode = "array"#"array"
))
autplo2t
Ahora asumiendo un modelo ARIMA, e incluimos como regresor al precio de la UF.
paste0("Optimo pero sin regresor")
## [1] "Optimo pero sin regresor"
arima_optimal = forecast::auto.arima(ts_gastos_casa_m)
arima_optimal
## Series: ts_gastos_casa_m
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.4081 1037.757
## s.e. 0.1025 37.039
##
## sigma^2 = 40949: log likelihood = -550.86
## AIC=1107.71 AICc=1108.02 BIC=1114.93
paste0("Optimo pero con regresor")
## [1] "Optimo pero con regresor"
arima_optimal2 = forecast::auto.arima(ts_gastos_casa_m, xreg=as.numeric(ts_uf_serie_corrected_m[1:(length(Gastos_casa_m$gasto_total))]))
arima_optimal2
## Series: ts_gastos_casa_m
## Regression with ARIMA(1,0,0) errors
##
## Coefficients:
## ar1 xreg
## 0.5174 31.4088
## s.e. 0.0935 1.3418
##
## sigma^2 = 39541: log likelihood = -549.49
## AIC=1104.97 AICc=1105.28 BIC=1112.19
forecast_uf<-
cbind.data.frame(fecha=as.Date(seq(as.numeric(as.Date(uf_serie_corrected$date3[length(uf_serie_corrected$date3)])),(as.numeric(as.Date(uf_serie_corrected$date3[length(uf_serie_corrected$date3)]))+299),by=1), origin = "1970-01-01"),forecast::forecast(fit_tbats, h=300)) %>%
dplyr::mutate(ano_m=stringr::str_extract(fecha,".{7}")) %>%
dplyr::group_by(ano_m)%>%
dplyr::summarise(uf=(mean(`Hi 95`,na.rm=T))/1000,fecha=first(fecha))%>%
data.frame()
autplo2t2<-
autoplotly::autoplotly(forecast::forecast(arima_optimal2,xreg=c(forecast_uf$uf[1],forecast_uf$uf), h=12), ts.colour = "darkred",
predict.colour = "blue", predict.linetype = "dashed")%>%
plotly::layout(showlegend = F,
yaxis = list(title = "Gastos (en miles)"),
xaxis = list(
title="Fecha",
ticktext = as.list(seq_dates[seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)]),
tickvals = as.list(seq(from = 1, to = (dim(Gastos_casa_m)[1]+12), by = 3)),
tickmode = "array"#"array"
))
autplo2t2
fr_fit_tbats_m<-forecast::forecast(fit_tbats_m, h=12)
fr_fit_tbats_m2<-forecast::forecast(arima_optimal, h=12)
fr_fit_tbats_m3<-forecast::forecast(arima_optimal2, h=12,xreg=c(forecast_uf$uf[1],forecast_uf$uf))
dplyr::right_join(dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_m3)),variable) %>% dplyr::summarise(max=max(value)), dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_m2)),variable) %>% dplyr::summarise(max=max(value)),by="variable") %>%
dplyr::right_join(dplyr::group_by(reshape2::melt(data.frame(fr_fit_tbats_m)),variable) %>% dplyr::summarise(max=max(value)),by="variable") %>%
dplyr::mutate(variable=factor(variable,levels=c("Lo.95","Lo.80","Point.Forecast","Hi.80","Hi.95"))) %>%
dplyr::arrange(variable) %>%
knitr::kable(format="markdown", caption="Estimación en miles de la plata a gastar en el futuro (de aquí a 12 meses) según cálculos de gastos mensuales",
col.names= c("Item","Modelo ARIMA con regresor (UF)","Modelo ARIMA sin regresor","Modelo TBATS"))
## No id variables; using all as measure variables
## No id variables; using all as measure variables
## No id variables; using all as measure variables
| Item | Modelo ARIMA con regresor (UF) | Modelo ARIMA sin regresor | Modelo TBATS |
|---|---|---|---|
| Lo.95 | 617.8096 | 603.3110 | 676.3105 |
| Lo.80 | 775.4549 | 753.6858 | 770.6065 |
| Point.Forecast | 1073.2539 | 1037.7506 | 1005.4035 |
| Hi.80 | 1371.0529 | 1321.8154 | 1330.4048 |
| Hi.95 | 1528.6982 | 1472.1902 | 1542.7990 |
path_sec2<- paste0("https://docs.google.com/spreadsheets/d/",Sys.getenv("SUPERSECRET"),"/export?format=csv&id=",Sys.getenv("SUPERSECRET"),"&gid=847461368")
Gastos_casa_mensual_2022 <- readr::read_csv(as.character(path_sec2),
#col_names = c("Tiempo", "gasto", "fecha", "obs", "monto", "gastador","link"),
skip=0)
## Rows: 83 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): mes_ano
## dbl (3): n, Tami, Andrés
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Gastos_casa_mensual_2022,5) %>%
knitr::kable("markdown",caption="Resumen mensual, primeras 5 observaciones")
| n | mes_ano | Tami | Andrés |
|---|---|---|---|
| 1 | marzo_2019 | 175533 | 68268 |
| 2 | abril_2019 | 152640 | 55031 |
| 3 | mayo_2019 | 152985 | 192219 |
| 4 | junio_2019 | 291067 | 84961 |
| 5 | julio_2019 | 241389 | 205893 |
(
Gastos_casa_mensual_2022 %>%
reshape2::melt(id.var=c("n","mes_ano")) %>%
dplyr::mutate(gastador=as.factor(variable)) %>%
dplyr::select(-variable) %>%
ggplot2::ggplot(aes(x = n, y = value, color=gastador)) +
scale_color_manual(name="Gastador", values=c("red", "blue"))+
geom_line(size=1) +
#geom_vline(xintercept = as.Date("2019-06-24"),linetype = "dashed") +
labs(y="Gastos (en miles)",x="Meses", subtitle="Azul= Tami; Rojo= Andrés") +
ggtitle( "Gastos Mensuales (total manual)") +
scale_y_continuous(labels = f <- function(x) paste0(x/1000)) +
# scale_color_manual(name = "Gastador", values= c("blue", "red"), labels = c("Tami", "Andrés")) +
# scale_x_yearweek(breaks = "1 month", minor_breaks = "1 week", labels=date_format("%m/%y")) +
# guides(color = F)+
theme_custom() +
theme(axis.text.x = element_text(vjust = 0.5,angle = 35)) +
theme(
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")
)
) %>% ggplotly()
Gastos_casa_mensual_2022$mes_ano <- gsub("marzo", "Mar", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("abril", "Apr", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("mayo", "May", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("junio", "Jun", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("julio", "Jul", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("agosto", "Aug", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("septiembre", "Sep", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("octubre", "Oct", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("noviembre", "Nov", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("diciembre", "Dec", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("enero", "Jan", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022$mes_ano <- gsub("febrero", "Feb", Gastos_casa_mensual_2022$mes_ano)
Gastos_casa_mensual_2022<- dplyr::filter(Gastos_casa_mensual_2022, !is.na(Tami))
Gastos_casa_mensual_2022$mes_ano <- parse_date_time(Gastos_casa_mensual_2022$mes_ano, "%b_%Y")
Gastos_casa_mensual_2022$mes_ano <- as.Date(as.character(Gastos_casa_mensual_2022$mes_ano))
Gastos_casa_mensual_2022_timegpt <- Gastos_casa_mensual_2022 %>%
mutate(value = Tami + Andrés) %>%
rename(ds = mes_ano, y = value) %>%
mutate(#ds= format(ds, "%Y-%m"),
unique_id = "1") %>% #it is only one series
select(unique_id, ds, y)
#Convertir la base de UF a mensual
uf_timegpt_my <- uf_serie_corrected %>%
dplyr::rename(ds = date3, y = value) %>%
dplyr::mutate(ds = format(ds, "%Y-%m-%d")) %>%
dplyr::mutate(unique_id = "serie_1")%>%
dplyr::select(unique_id, ds, y) %>%
mutate(ds = ymd(ds)) %>% # Convert 'ds' to Date
mutate(month = month(ds), year = year(ds)) %>% # Extract month and year
group_by(month, year) %>% # Group by month and year
summarise(average_y = mean(y))%>% # Calculate average y
mutate(ds = as.Date(paste0(year,"-",month, "-01")))%>%
ungroup()%>%
select(ds, uf=average_y)
Gastos_casa_mensual_2022_timegpt_ex<-
Gastos_casa_mensual_2022_timegpt |>
dplyr::left_join(uf_timegpt_my, by=c("ds"="ds"))
#Historical Exogenous Variables: These should be included in the input data immediately following the id_col, ds, and y columns
gastos_timegpt_fcst <- nixtlar::nixtla_client_forecast(
Gastos_casa_mensual_2022_timegpt_ex,
h = 12,
freq = "M", # Monthly frequency
add_history = F,
level = c(80, 95),
model = "timegpt-1",#"timegpt-1-long-horizon",
clean_ex_first = TRUE
)
# Convert 'ds' to Date format in both tables
Gastos_casa_mensual_2022_timegpt_corr <- Gastos_casa_mensual_2022_timegpt %>%
mutate(ds = as.Date(paste0(ds, "-01"))) # Add day to make it a complete date
gastos_timegpt_fcst <- gastos_timegpt_fcst %>%
mutate(ds = as.Date(paste0(ds, "-01"))) # Add day to make it a complete date
# Combine historical and forecast data
full_data_gastos <- bind_rows(
Gastos_casa_mensual_2022_timegpt_corr %>% mutate(type = "Histórico"),
gastos_timegpt_fcst %>% mutate(type = "Pronóstico")
)
full_data_gastos |>
dplyr::mutate(y= ifelse(is.na(y),TimeGPT/1000, y/1000)) |>
# Visualize results
ggplot(aes(x = ds, y = y)) +
geom_ribbon(aes(ymin = `TimeGPT-lo-95`/1000, ymax = `TimeGPT-hi-95`/1000),
fill = "#4B9CD3", alpha = 0.2) +
geom_ribbon(aes(ymin = `TimeGPT-lo-80`/1000, ymax = `TimeGPT-hi-80`/1000),
fill = "#4B9CD3", alpha = 0.3) +
geom_line(aes(color = type), linewidth = 1.5) +
geom_vline(xintercept = max(filter(full_data_gastos, type == "Histórico")$ds),
linetype = "dashed", color = "red", linewidth = 0.8) +
scale_x_date(
date_breaks = "3 months",
date_labels = "%b %Y"
) +
scale_y_continuous(
name = "Gastos Totales",
labels = scales::comma#,
# breaks = pretty(full_data_gastos$y, n = 10),
# expand = expansion(mult = c(0.05, 0.05))
) +
scale_color_manual(
name = "Leyenda",
values = c("Histórico" = "black", "Pronóstico" = "#4B9CD3")
) +
labs(
title = "Pronóstico de Gastos Mensuales (TimeGPT, ajustando por UF promedio mensual)",
subtitle = "Intervalos de confianza al 80% (más oscuro) y 95% (más claro)",
x = "Fecha",
y = "Gastos Totales",
color = "Leyenda"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 10),
legend.position = "bottom",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
Sys.getenv("R_LIBS_USER")
## [1] "D:\\a\\_temp\\Library"
sessionInfo()
## R version 4.4.0 (2024-04-24 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows Server 2022 x64 (build 26100)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252 LC_CTYPE=Spanish_Chile.1252
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Chile.1252
## system code page: 65001
##
## time zone: UTC
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] prophet_1.0 rlang_1.1.6 Rcpp_1.1.0 scales_1.4.0
## [5] ggiraph_0.9.2 tidytext_0.4.3 DT_0.34.0 janitor_2.2.1
## [9] autoplotly_0.1.4 rvest_1.0.5 plotly_4.11.0 xts_0.14.1
## [13] forecast_8.24.0 wordcloud_2.6 RColorBrewer_1.1-3 SnowballC_0.7.1
## [17] tm_0.7-17 NLP_0.3-2 tsibble_1.1.6 lubridate_1.9.4
## [21] forcats_1.0.1 dplyr_1.1.4 purrr_1.2.0 tidyr_1.3.1
## [25] tibble_3.3.0 tidyverse_2.0.0 gsynth_1.2.1 sjPlot_2.9.0
## [29] lattice_0.22-6 GGally_2.4.0 ggplot2_4.0.1 gridExtra_2.3
## [33] plotrix_3.8-13 sparklyr_1.9.3 httr_1.4.7 readxl_1.4.5
## [37] zoo_1.8-15 stringr_1.6.0 stringi_1.8.7 DataExplorer_0.8.4
## [41] data.table_1.17.8 reshape2_1.4.5 fUnitRoots_4052.82 plyr_1.8.9
## [45] readr_2.1.6
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-9 cellranger_1.1.0 lifecycle_1.0.4
## [4] httr2_1.2.1 StanHeaders_2.32.10 doParallel_1.0.17
## [7] globals_0.18.0 vroom_1.6.6 MASS_7.3-60.2
## [10] crosstalk_1.2.2 magrittr_2.0.4 sass_0.4.10
## [13] rmarkdown_2.30 jquerylib_0.1.4 yaml_2.3.10
## [16] fracdiff_1.5-3 otel_0.2.0 doRNG_1.8.6.2
## [19] askpass_1.2.1 pkgbuild_1.4.8 DBI_1.2.3
## [22] abind_1.4-8 quadprog_1.5-8 nnet_7.3-19
## [25] rappdirs_0.3.3 sandwich_3.1-1 gdtools_0.4.4
## [28] inline_0.3.21 data.tree_1.2.0 tokenizers_0.3.0
## [31] listenv_0.10.0 anytime_0.3.12 spatial_7.3-17
## [34] parallelly_1.45.1 codetools_0.2-20 xml2_1.5.0
## [37] tidyselect_1.2.1 farver_2.1.2 urca_1.3-4
## [40] matrixStats_1.5.0 stats4_4.4.0 jsonlite_2.0.0
## [43] ellipsis_0.3.2 Formula_1.2-5 iterators_1.0.14
## [46] systemfonts_1.3.1 foreach_1.5.2 tools_4.4.0
## [49] glue_1.8.0 xfun_0.54 TTR_0.24.4
## [52] ggfortify_0.4.19 loo_2.8.0 withr_3.0.2
## [55] timeSeries_4041.111 fastmap_1.2.0 openssl_2.3.4
## [58] caTools_1.18.3 digest_0.6.39 timechange_0.3.0
## [61] R6_2.6.1 lfe_3.1.1 colorspace_2.1-2
## [64] networkD3_0.4.1 gtools_3.9.5 generics_0.1.4
## [67] fontLiberation_0.1.0 htmlwidgets_1.6.4 ggstats_0.11.0
## [70] pkgconfig_2.0.3 gtable_0.3.6 timeDate_4051.111
## [73] lmtest_0.9-40 S7_0.2.1 selectr_0.5-0
## [76] janeaustenr_1.0.0 htmltools_0.5.8.1 fontBitstreamVera_0.1.1
## [79] tseries_0.10-58 snakecase_0.11.1 knitr_1.51
## [82] rstudioapi_0.17.1 tzdb_0.5.0 nlme_3.1-164
## [85] curl_7.0.0 cachem_1.1.0 KernSmooth_2.23-22
## [88] parallel_4.4.0 fBasics_4041.97 pillar_1.11.1
## [91] vctrs_0.6.5 gplots_3.3.0 slam_0.1-55
## [94] dbplyr_2.5.1 xtable_1.8-4 evaluate_1.0.5
## [97] mvtnorm_1.3-3 cli_3.6.5 compiler_4.4.0
## [100] crayon_1.5.3 rngtools_1.5.2 future.apply_1.20.0
## [103] labeling_0.4.3 rstan_2.32.7 QuickJSR_1.8.1
## [106] viridisLite_0.4.2 lazyeval_0.2.2 fontquiver_0.2.1
## [109] Matrix_1.7-0 hms_1.1.4 bit64_4.6.0-1
## [112] future_1.68.0 nixtlar_0.6.2 extraDistr_1.10.0
## [115] igraph_2.2.1 RcppParallel_5.1.11-1 bslib_0.9.0
## [118] quantmod_0.4.28 bit_4.6.0
#save.image("__analisis.RData")
sesion_info <- devtools::session_info()
dplyr::select(
tibble::as_tibble(sesion_info$packages),
c(package, loadedversion, source)
) %>%
DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
'', htmltools::em('Packages')),
options=list(
initComplete = htmlwidgets::JS(
"function(settings, json) {",
"$(this.api().tables().body()).css({
'font-family': 'Helvetica Neue',
'font-size': '50%',
'code-inline-font-size': '15%',
'white-space': 'nowrap',
'line-height': '0.75em',
'min-height': '0.5em'
});",#;
"}")))